home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Cream of the Crop 20
/
Cream of the Crop 20 (Terry Blount) (1996).iso
/
os2
/
xdsn217.zip
/
SAMPLES
/
SIMPLE
/
linnew.c
< prev
next >
Wrap
C/C++ Source or Header
|
1996-06-04
|
23KB
|
881 lines
/*
**
** LINPACK.C Linpack benchmark, calculates FLOPS.
** (FLoating Point Operations Per Second)
**
** Translated to C by Bonnie Toy 5/88
**
** Modified by Will Menninger, 10/93, with these features:
** (modified on 2/25/94 to fix a problem with daxpy for
** unequal increments or equal increments not equal to 1.
** Jack Dongarra)
**
** - Defaults to double precision.
** - Averages ROLLed and UNROLLed performance.
** - User selectable array sizes.
** - Automatically does enough repetitions to take at least 50 CPU seconds.
** - Prints machine precision.
** - ANSI prototyping.
**
** To compile: cc -O -o linpack linpack.c -lm
**
**
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <float.h>
#define DP
#ifdef SP
#define ZERO 0.0
#define ONE 1.0
#define PREC "Single"
#define BASE10DIG FLT_DIG
typedef float REAL;
#endif
#ifdef DP
#define ZERO 0.0e0
#define ONE 1.0e0
#define PREC "Double"
#define BASE10DIG DBL_DIG
typedef double REAL;
#endif
static REAL linpack (long nreps,int arsize);
static void matgen (REAL *a,int lda,int n,REAL *b,REAL *norma);
static void dgefa (REAL *a,int lda,int n,int *ipvt,int *info,int roll);
static void dgesl (REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll);
static void daxpy_r (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
static REAL ddot_r (int n,REAL *dx,int incx,REAL *dy,int incy);
static void dscal_r (int n,REAL da,REAL *dx,int incx);
static void daxpy_ur (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy);
static REAL ddot_ur (int n,REAL *dx,int incx,REAL *dy,int incy);
static void dscal_ur (int n,REAL da,REAL *dx,int incx);
static int idamax (int n,REAL *dx,int incx);
static REAL second (void);
static void *mempool;
void main(void)
{
char buf[80];
int arsize;
long arsize2d,memreq,nreps;
size_t malloc_arg;
while (1)
{
printf("Enter array size (q to quit) [200]: ");
fgets(buf,79,stdin);
if (buf[0]=='q' || buf[0]=='Q')
break;
if (buf[0]=='\0' || buf[0]=='\n')
arsize=200;
else
arsize=atoi(buf);
arsize/=2;
arsize*=2;
if (arsize<10)
{
printf("Too small.\n");
continue;
}
arsize2d = (long)arsize*(long)arsize;
memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int);
printf("Memory required: %ldK.\n",(memreq+512L)>>10);
malloc_arg=(size_t)memreq;
if (malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL)
{
printf("Not enough memory available for given array size.\n\n");
continue;
}
printf("\n\nLINPACK benchmark, %s precision.\n",PREC);
printf("Machine precision: %d digits.\n",BASE10DIG);
printf("Array size %d X %d.\n",arsize,arsize);
printf("Average rolled and unrolled performance:\n\n");
printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n");
printf("----------------------------------------------------\n");
nreps=1;
while (linpack(nreps,arsize)<40.)
nreps*=2;
free(mempool);
printf("\n");
}
}
static REAL linpack(long nreps,int arsize)
{
REAL *a,*b;
REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
int *ipvt,n,info,lda;
long i,arsize2d;
lda = arsize;
n = arsize/2;
arsize2d = (long)arsize*(long)arsize;
ops=((2.0*n*n*n)/3.0+2.0*n*n);
a=(REAL *)mempool;
b=a+arsize2d;
ipvt=(int *)&b[arsize];
tdgesl=0;
tdgefa=0;
totalt=second();
for (i=0;i<nreps;i++)
{
matgen(a,lda,n,b,&norma);
t1 = second();
dgefa(a,lda,n,ipvt,&info,1);
tdgefa += second()-t1;
t1 = second();
dgesl(a,lda,n,ipvt,b,0,1);
tdgesl += second()-t1;
}
for (i=0;i<nreps;i++)
{
matgen(a,lda,n,b,&norma);
t1 = second();
dgefa(a,lda,n,ipvt,&info,0);
tdgefa += second()-t1;
t1 = second();
dgesl(a,lda,n,ipvt,b,0,0);
tdgesl += second()-t1;
}
totalt=second()-totalt;
if (totalt<0.5 || tdgefa+tdgesl<0.2)
return(0.);
kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
toverhead=totalt-tdgefa-tdgesl;
if (tdgefa<0.)
tdgefa=0.;
if (tdgesl<0.)
tdgesl=0.;
if (toverhead<0.)
toverhead=0.;
printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n",
nreps,totalt,100.*tdgefa/totalt,
100.*tdgesl/totalt,100.*toverhead/totalt,
kflops);
return(totalt);
}
/*
** For matgen,
** We would like to declare a[][lda], but c does not allow it. In this
** function, references to a[i][j] are written a[lda*i+j].
*/
static void matgen(REAL *a,int lda,int n,REAL *b,REAL *norma)
{
int init,i,j;
init = 1325;
*norma = 0.0;
for (j = 0; j < n; j++)
for (i = 0; i < n; i++)
{
init = (int)((long)3125*(long)init % 65536L);
a[lda*j+i] = (init - 32768.0)/16384.0;
*norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma;
}
for (i = 0; i < n; i++)
b[i] = 0.0;
for (j = 0; j < n; j++)
for (i = 0; i < n; i++)
b[i] = b[i] + a[lda*j+i];
}
/*
**
** DGEFA benchmark
**
** We would like to declare a[][lda], but c does not allow it. In this
** function, references to a[i][j] are written a[lda*i+j].
**
** dgefa factors a double precision matrix by gaussian elimination.
**
** dgefa is usually called by dgeco, but it can be called
** directly with a saving in time if rcond is not needed.
** (time for dgeco) = (1 + 9/n)*(time for dgefa) .
**
** on entry
**
** a REAL precision[n][lda]
** the matrix to be factored.
**
** lda integer
** the leading dimension of the array a .
**
** n integer
** the order of the matrix a .
**
** on return
**
** a an upper triangular matrix and the multipliers
** which were used to obtain it.
** the factorization can be written a = l*u where
** l is a product of permutation and unit lower
** triangular matrices and u is upper triangular.
**
** ipvt integer[n]
** an integer vector of pivot indices.
**
** info integer
** = 0 normal value.
** = k if u[k][k] .eq. 0.0 . this is not an error
** condition for this subroutine, but it does
** indicate that dgesl or dgedi will divide by zero
** if called. use rcond in dgeco for a reliable
** indication of singularity.
**
** linpack. this version dated 08/14/78 .
** cleve moler, university of New Mexico, argonne national lab.
**
** functions
**
** blas daxpy,dscal,idamax
**
*/
static void dgefa(REAL *a,int lda,int n,int *ipvt,int *info,int roll)
{
REAL t;
int idamax(),j,k,kp1,l,nm1;
/* gaussian elimination with partial pivoting */
if (roll)
{
*info = 0;
nm1 = n - 1;
if (nm1 >= 0)
for (k = 0; k < nm1; k++)
{
kp1 = k + 1;
/* find l = pivot index */
l = idamax(n-k,&a[lda*k+k],1) + k;
ipvt[k] = l;
/* zero pivot implies this column already
triangularized */
if (a[lda*k+l] != ZERO)
{
/* interchange if necessary */
if (l != k)
{
t = a[lda*k+l];
a[lda